; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11 %s

define amdgpu_gfx i1 @return_i1() #0 {
; GFX9-LABEL: return_i1:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 1
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: return_i1:
; GFX10PLUS:       ; %bb.0: ; %entry
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 1
; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret i1 1
}

define amdgpu_gfx void @call_i1() #0 {
; GFX9-LABEL: call_i1:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s36, s33
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12
; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX9-NEXT:    v_writelane_b32 v1, s30, 0
; GFX9-NEXT:    v_writelane_b32 v1, s31, 1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v1, 1
; GFX9-NEXT:    v_readlane_b32 s30, v1, 0
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    s_mov_b32 s33, s36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_i1:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_mov_b32 s36, s33
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v1, s30, 0
; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX10-NEXT:    v_writelane_b32 v1, s31, 1
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v1, 1
; GFX10-NEXT:    v_readlane_b32 s30, v1, 0
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    s_mov_b32 s33, s36
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_i1:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_mov_b32 s2, s33
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, return_i1@gotpcrel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, return_i1@gotpcrel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v1, s30, 0
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    v_writelane_b32 v1, s31, 1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v1, 1
; GFX11-NEXT:    v_readlane_b32 s30, v1, 0
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    s_mov_b32 s33, s2
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx i1 @return_i1()
  ret void
}

define amdgpu_gfx i16 @return_i16() #0 {
; GFX9-LABEL: return_i16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 10
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: return_i16:
; GFX10PLUS:       ; %bb.0: ; %entry
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 10
; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret i16 10
}

define amdgpu_gfx void @call_i16() #0 {
; GFX9-LABEL: call_i16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s36, s33
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12
; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX9-NEXT:    v_writelane_b32 v1, s30, 0
; GFX9-NEXT:    v_writelane_b32 v1, s31, 1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v1, 1
; GFX9-NEXT:    v_readlane_b32 s30, v1, 0
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    s_mov_b32 s33, s36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_i16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_mov_b32 s36, s33
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v1, s30, 0
; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX10-NEXT:    v_writelane_b32 v1, s31, 1
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v1, 1
; GFX10-NEXT:    v_readlane_b32 s30, v1, 0
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    s_mov_b32 s33, s36
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_i16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_mov_b32 s2, s33
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, return_i16@gotpcrel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, return_i16@gotpcrel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v1, s30, 0
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    v_writelane_b32 v1, s31, 1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v1, 1
; GFX11-NEXT:    v_readlane_b32 s30, v1, 0
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    s_mov_b32 s33, s2
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx i16 @return_i16()
  ret void
}

define amdgpu_gfx <2 x i16> @return_2xi16() #0 {
; GFX9-LABEL: return_2xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: return_2xi16:
; GFX10PLUS:       ; %bb.0: ; %entry
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret <2 x i16> <i16 1, i16 2>
}

define amdgpu_gfx void @call_2xi16() #0 {
; GFX9-LABEL: call_2xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s36, s33
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12
; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX9-NEXT:    v_writelane_b32 v1, s30, 0
; GFX9-NEXT:    v_writelane_b32 v1, s31, 1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v1, 1
; GFX9-NEXT:    v_readlane_b32 s30, v1, 0
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    s_mov_b32 s33, s36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_2xi16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_mov_b32 s36, s33
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v1, s30, 0
; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX10-NEXT:    v_writelane_b32 v1, s31, 1
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v1, 1
; GFX10-NEXT:    v_readlane_b32 s30, v1, 0
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    s_mov_b32 s33, s36
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_2xi16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_mov_b32 s2, s33
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, return_2xi16@gotpcrel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, return_2xi16@gotpcrel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v1, s30, 0
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    v_writelane_b32 v1, s31, 1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v1, 1
; GFX11-NEXT:    v_readlane_b32 s30, v1, 0
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    s_mov_b32 s33, s2
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx <2 x i16> @return_2xi16()
  ret void
}

define amdgpu_gfx <3 x i16> @return_3xi16() #0 {
; GFX9-LABEL: return_3xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT:    v_mov_b32_e32 v1, 3
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: return_3xi16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX10-NEXT:    v_mov_b32_e32 v1, 3
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_3xi16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret <3 x i16> <i16 1, i16 2, i16 3>
}

define amdgpu_gfx void @call_3xi16() #0 {
; GFX9-LABEL: call_3xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s36, s33
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12
; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX9-NEXT:    v_writelane_b32 v2, s30, 0
; GFX9-NEXT:    v_writelane_b32 v2, s31, 1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v2, 1
; GFX9-NEXT:    v_readlane_b32 s30, v2, 0
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    s_mov_b32 s33, s36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_3xi16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_mov_b32 s36, s33
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v2, s30, 0
; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX10-NEXT:    v_writelane_b32 v2, s31, 1
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v2, 1
; GFX10-NEXT:    v_readlane_b32 s30, v2, 0
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    s_mov_b32 s33, s36
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_3xi16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_mov_b32 s2, s33
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v2, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, return_3xi16@gotpcrel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, return_3xi16@gotpcrel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v2, s30, 0
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    v_writelane_b32 v2, s31, 1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v2, 1
; GFX11-NEXT:    v_readlane_b32 s30, v2, 0
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v2, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    s_mov_b32 s33, s2
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx <3 x i16> @return_3xi16()
  ret void
}

; Check that return values that do not fit in registers do not crash

define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
; GFX9-LABEL: return_512xi32:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v1, 0
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2040
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2036
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2028
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2024
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2020
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2016
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2012
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2008
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2004
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2000
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1996
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1992
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1988
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1984
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1980
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1976
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1972
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1968
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1964
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1960
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1956
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1952
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1948
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1944
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1940
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1936
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1932
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1928
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1924
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1920
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1916
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1912
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1908
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1904
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1900
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1896
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1892
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1888
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1884
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1880
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1876
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1872
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1868
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1864
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1860
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1856
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1852
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1848
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1844
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1840
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1836
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1832
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1828
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1824
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1820
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1816
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1812
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1808
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1804
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1800
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1796
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1792
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1788
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1784
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1780
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1776
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1772
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1768
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1764
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1760
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1756
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1752
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1748
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1744
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1740
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1736
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1732
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1728
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1724
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1720
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1716
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1712
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1708
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1704
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1700
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1696
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1692
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1688
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1684
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1680
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1676
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1672
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1668
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1664
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1660
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1656
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1652
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1648
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1644
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1640
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1636
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1632
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1628
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1624
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1620
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1616
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1612
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1608
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1604
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1600
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1596
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1592
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1588
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1584
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1580
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1576
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1572
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1568
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1564
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1560
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1556
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1552
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1548
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1544
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1540
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1536
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1532
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1528
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1524
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1520
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1516
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1512
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1508
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1504
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1500
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1496
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1492
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1488
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1484
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1480
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1476
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1472
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1468
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1464
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1460
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1456
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1452
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1448
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1444
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1440
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1436
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1432
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1428
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1424
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1420
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1416
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1412
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1408
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1404
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1400
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1396
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1392
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1388
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1384
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1380
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1376
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1372
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1368
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1364
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1360
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1356
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1352
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1348
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1344
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1340
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1336
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1332
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1328
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1324
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1320
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1316
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1312
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1308
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1304
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1300
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1296
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1292
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1288
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1284
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1280
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1276
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1272
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1268
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1264
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1260
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1256
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1252
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1248
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1244
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1240
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1236
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1232
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1228
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1224
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1220
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1216
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1212
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1208
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1204
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1200
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1196
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1192
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1188
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1184
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1180
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1176
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1172
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1168
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1164
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1160
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1156
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1152
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1148
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1144
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1140
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1136
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1132
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1128
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1124
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1120
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1116
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1112
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1108
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1104
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1100
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1096
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1092
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1088
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1084
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1080
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1076
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1072
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1068
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1064
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1060
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1056
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1052
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1048
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1044
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1040
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1036
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1032
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1024
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1016
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1012
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1008
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1004
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1000
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:996
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:992
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:988
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:984
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:980
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:976
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:972
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:968
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:964
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:960
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:956
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:952
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:948
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:944
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:940
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:936
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:932
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:928
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:924
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:920
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:916
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:912
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:908
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:904
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:900
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:896
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:892
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:888
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:884
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:880
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:876
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:872
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:868
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:864
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:860
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:856
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:852
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:848
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:844
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:840
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:836
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:832
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:828
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:824
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:820
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:816
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:812
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:808
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:804
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:800
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:796
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:792
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:788
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:784
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:780
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:776
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:772
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:768
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:764
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:760
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:756
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:752
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:748
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:744
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:740
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:736
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:732
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:728
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:724
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:720
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:716
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:712
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:708
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:704
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:700
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:696
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:692
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:688
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:684
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:680
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:676
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:672
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:668
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:664
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:660
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:656
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:652
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:648
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:644
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:640
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:636
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:632
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:628
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:624
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:620
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:616
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:612
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:608
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:604
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:600
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:596
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:592
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:588
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:584
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:580
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:576
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:572
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:568
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:564
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:560
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:556
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:552
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:548
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:544
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:540
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:536
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:532
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:528
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:524
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:520
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:516
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:512
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:508
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:504
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:500
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:496
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:492
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:488
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:484
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:480
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:476
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:472
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:468
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:464
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:460
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:456
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:452
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:448
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:444
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:440
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:436
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:432
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:428
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:424
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:420
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:416
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:412
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:408
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:404
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:400
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:396
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:392
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:388
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:384
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:380
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:376
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:372
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:368
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:364
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:360
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:356
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:352
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:348
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:344
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:340
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:336
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:332
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:328
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:324
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:320
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:316
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:312
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:308
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:304
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:300
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:296
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:292
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:288
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:284
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:280
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:276
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:272
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:268
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:264
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:260
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:256
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:252
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:248
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:244
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:236
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:232
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:228
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:224
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:220
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:216
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:212
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:208
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:204
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:200
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:196
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:192
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:188
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:184
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:180
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:176
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:172
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:168
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:164
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:160
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:156
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:152
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:148
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:144
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:140
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:136
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:132
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:128
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:124
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:120
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:116
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:108
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:104
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:100
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:96
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:92
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:88
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:84
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:80
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:76
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:72
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:68
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:64
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:52
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:48
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:44
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:40
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:36
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:32
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:28
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:24
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:20
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: return_512xi32:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    v_mov_b32_e32 v1, 0
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2040
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2036
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2028
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2024
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2020
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2016
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2012
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2008
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2004
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2000
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1996
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1992
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1988
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1984
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1980
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1976
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1972
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1968
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1964
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1960
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1956
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1952
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1948
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1944
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1940
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1936
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1932
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1928
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1924
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1920
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1916
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1912
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1908
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1904
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1900
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1896
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1892
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1888
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1884
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1880
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1876
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1872
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1868
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1864
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1860
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1856
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1852
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1848
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1844
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1840
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1836
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1832
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1828
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1824
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1820
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1816
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1812
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1808
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1804
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1800
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1796
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1792
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1788
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1784
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1780
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1776
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1772
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1768
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1764
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1760
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1756
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1752
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1748
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1744
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1740
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1736
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1732
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1728
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1724
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1720
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1716
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1712
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1708
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1704
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1700
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1696
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1692
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1688
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1684
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1680
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1676
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1672
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1668
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1664
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1660
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1656
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1652
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1648
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1644
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1640
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1636
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1632
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1628
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1624
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1620
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1616
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1612
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1608
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1604
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1600
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1596
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1592
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1588
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1584
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1580
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1576
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1572
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1568
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1564
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1560
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1556
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1552
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1548
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1544
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1540
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1536
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1532
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1528
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1524
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1520
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1516
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1512
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1508
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1504
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1500
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1496
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1492
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1488
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1484
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1480
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1476
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1472
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1468
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1464
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1460
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1456
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1452
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1448
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1444
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1440
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1436
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1432
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1428
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1424
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1420
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1416
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1412
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1408
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1404
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1400
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1396
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1392
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1388
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1384
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1380
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1376
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1372
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1368
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1364
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1360
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1356
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1352
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1348
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1344
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1340
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1336
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1332
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1328
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1324
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1320
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1316
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1312
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1308
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1304
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1300
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1296
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1292
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1288
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1284
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1280
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1276
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1272
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1268
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1264
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1260
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1256
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1252
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1248
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1244
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1240
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1236
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1232
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1228
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1224
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1220
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1216
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1212
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1208
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1204
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1200
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1196
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1192
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1188
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1184
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1180
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1176
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1172
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1168
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1164
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1160
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1156
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1152
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1148
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1144
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1140
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1136
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1132
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1128
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1124
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1120
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1116
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1112
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1108
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1104
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1100
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1096
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1092
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1088
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1084
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1080
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1076
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1072
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1068
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1064
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1060
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1056
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1052
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1048
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1044
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1040
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1036
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1032
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1024
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1016
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1012
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1008
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1004
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1000
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:996
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:992
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:988
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:984
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:980
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:976
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:972
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:968
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:964
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:960
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:956
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:952
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:948
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:944
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:940
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:936
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:932
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:928
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:924
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:920
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:916
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:912
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:908
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:904
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:900
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:896
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:892
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:888
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:884
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:880
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:876
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:872
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:868
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:864
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:860
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:856
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:852
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:848
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:844
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:840
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:836
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:832
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:828
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:824
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:820
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:816
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:812
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:808
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:804
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:800
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:796
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:792
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:788
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:784
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:780
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:776
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:772
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:768
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:764
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:760
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:756
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:752
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:748
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:744
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:740
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:736
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:732
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:728
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:724
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:720
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:716
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:712
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:708
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:704
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:700
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:696
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:692
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:688
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:684
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:680
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:676
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:672
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:668
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:664
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:660
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:656
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:652
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:648
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:644
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:640
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:636
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:632
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:628
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:624
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:620
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:616
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:612
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:608
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:604
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:600
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:596
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:592
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:588
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:584
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:580
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:576
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:572
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:568
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:564
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:560
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:556
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:552
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:548
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:544
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:540
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:536
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:532
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:528
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:524
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:520
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:516
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:512
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:508
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:504
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:500
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:496
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:492
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:488
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:484
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:480
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:476
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:472
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:468
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:464
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:460
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:456
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:452
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:448
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:444
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:440
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:436
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:432
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:428
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:424
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:420
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:416
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:412
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:408
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:404
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:400
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:396
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:392
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:388
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:384
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:380
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:376
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:372
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:368
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:364
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:360
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:356
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:352
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:348
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:344
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:340
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:336
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:332
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:328
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:324
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:320
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:316
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:312
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:308
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:304
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:300
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:296
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:292
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:288
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:284
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:280
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:276
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:272
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:268
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:264
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:260
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:256
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:252
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:248
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:244
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:236
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:232
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:228
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:224
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:220
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:216
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:212
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:208
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:204
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:200
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:196
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:192
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:188
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:184
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:180
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:176
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:172
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:168
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:164
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:160
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:156
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:152
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:148
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:144
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:140
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:136
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:132
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:128
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:124
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:120
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:116
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:108
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:104
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:100
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:96
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:92
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:88
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:84
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:80
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:76
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:72
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:68
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:64
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:52
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:48
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:44
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:40
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:36
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:32
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:28
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:24
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:20
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_512xi32:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_mov_b32 s0, 0
; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-NEXT:    s_mov_b32 s3, s0
; GFX11-NEXT:    s_mov_b32 s1, s0
; GFX11-NEXT:    s_mov_b32 s2, s0
; GFX11-NEXT:    v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
; GFX11-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    s_add_i32 s1, s0, 0x7f0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x7e0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s0
; GFX11-NEXT:    s_add_i32 s3, s0, 0x7d0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s3
; GFX11-NEXT:    s_add_i32 s1, s0, 0x7c0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x7b0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x7a0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x790
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x780
; GFX11-NEXT:    s_add_i32 s2, s0, 0x770
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x760
; GFX11-NEXT:    s_add_i32 s2, s0, 0x750
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x740
; GFX11-NEXT:    s_add_i32 s2, s0, 0x730
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x720
; GFX11-NEXT:    s_add_i32 s2, s0, 0x710
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x700
; GFX11-NEXT:    s_add_i32 s2, s0, 0x6f0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x6e0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x6d0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x6c0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x6b0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x6a0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x690
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x680
; GFX11-NEXT:    s_add_i32 s2, s0, 0x670
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x660
; GFX11-NEXT:    s_add_i32 s2, s0, 0x650
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x640
; GFX11-NEXT:    s_add_i32 s2, s0, 0x630
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x620
; GFX11-NEXT:    s_add_i32 s2, s0, 0x610
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x600
; GFX11-NEXT:    s_add_i32 s2, s0, 0x5f0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x5e0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x5d0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x5c0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x5b0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x5a0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x590
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x580
; GFX11-NEXT:    s_add_i32 s2, s0, 0x570
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x560
; GFX11-NEXT:    s_add_i32 s2, s0, 0x550
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x540
; GFX11-NEXT:    s_add_i32 s2, s0, 0x530
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x520
; GFX11-NEXT:    s_add_i32 s2, s0, 0x510
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x500
; GFX11-NEXT:    s_add_i32 s2, s0, 0x4f0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x4e0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x4d0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x4c0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x4b0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x4a0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x490
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x480
; GFX11-NEXT:    s_add_i32 s2, s0, 0x470
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x460
; GFX11-NEXT:    s_add_i32 s2, s0, 0x450
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x440
; GFX11-NEXT:    s_add_i32 s2, s0, 0x430
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x420
; GFX11-NEXT:    s_add_i32 s2, s0, 0x410
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x400
; GFX11-NEXT:    s_add_i32 s2, s0, 0x3f0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x3e0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x3d0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x3c0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x3b0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x3a0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x390
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x380
; GFX11-NEXT:    s_add_i32 s2, s0, 0x370
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x360
; GFX11-NEXT:    s_add_i32 s2, s0, 0x350
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x340
; GFX11-NEXT:    s_add_i32 s2, s0, 0x330
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x320
; GFX11-NEXT:    s_add_i32 s2, s0, 0x310
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x300
; GFX11-NEXT:    s_add_i32 s2, s0, 0x2f0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x2e0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x2d0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x2c0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x2b0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x2a0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x290
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x280
; GFX11-NEXT:    s_add_i32 s2, s0, 0x270
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x260
; GFX11-NEXT:    s_add_i32 s2, s0, 0x250
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x240
; GFX11-NEXT:    s_add_i32 s2, s0, 0x230
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x220
; GFX11-NEXT:    s_add_i32 s2, s0, 0x210
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x200
; GFX11-NEXT:    s_add_i32 s2, s0, 0x1f0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x1e0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x1d0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x1c0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x1b0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x1a0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x190
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x180
; GFX11-NEXT:    s_add_i32 s2, s0, 0x170
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x160
; GFX11-NEXT:    s_add_i32 s2, s0, 0x150
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x140
; GFX11-NEXT:    s_add_i32 s2, s0, 0x130
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x120
; GFX11-NEXT:    s_add_i32 s2, s0, 0x110
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x100
; GFX11-NEXT:    s_add_i32 s2, s0, 0xf0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0xe0
; GFX11-NEXT:    s_add_i32 s2, s0, 0xd0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0xc0
; GFX11-NEXT:    s_add_i32 s2, s0, 0xb0
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0xa0
; GFX11-NEXT:    s_add_i32 s2, s0, 0x90
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x80
; GFX11-NEXT:    s_add_i32 s2, s0, 0x70
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 0x60
; GFX11-NEXT:    s_add_i32 s2, s0, 0x50
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 64
; GFX11-NEXT:    s_add_i32 s2, s0, 48
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s2
; GFX11-NEXT:    s_add_i32 s1, s0, 32
; GFX11-NEXT:    s_add_i32 s0, s0, 16
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s1
; GFX11-NEXT:    scratch_store_b128 off, v[1:4], s0
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret <512 x i32> zeroinitializer
}

define amdgpu_gfx void @call_512xi32() #0 {
; GFX9-LABEL: call_512xi32:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s36, s33
; GFX9-NEXT:    s_add_i32 s33, s32, 0x1ffc0
; GFX9-NEXT:    s_and_b32 s33, s33, 0xfffe0000
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_add_i32 s32, s32, 0x60000
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12
; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX9-NEXT:    v_writelane_b32 v2, s30, 0
; GFX9-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
; GFX9-NEXT:    v_writelane_b32 v2, s31, 1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v2, 1
; GFX9-NEXT:    v_readlane_b32 s30, v2, 0
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_add_i32 s32, s32, 0xfffa0000
; GFX9-NEXT:    s_mov_b32 s33, s36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_512xi32:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_mov_b32 s36, s33
; GFX10-NEXT:    s_add_i32 s33, s32, 0xffe0
; GFX10-NEXT:    s_and_b32 s33, s33, 0xffff0000
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_add_i32 s32, s32, 0x30000
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v2, s30, 0
; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX10-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
; GFX10-NEXT:    v_writelane_b32 v2, s31, 1
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v2, 1
; GFX10-NEXT:    v_readlane_b32 s30, v2, 0
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_add_i32 s32, s32, 0xfffd0000
; GFX10-NEXT:    s_mov_b32 s33, s36
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_512xi32:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_mov_b32 s34, s33
; GFX11-NEXT:    s_add_i32 s33, s32, 0x7ff
; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT:    s_and_b32 s33, s33, 0xfffff800
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v5, s33 offset:2048 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_addk_i32 s32, 0x1800
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, return_512xi32@gotpcrel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, return_512xi32@gotpcrel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v5, s30, 0
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    v_mov_b32_e32 v0, s33
; GFX11-NEXT:    v_writelane_b32 v5, s31, 1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v5, 1
; GFX11-NEXT:    v_readlane_b32 s30, v5, 0
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v5, off, s33 offset:2048 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_addk_i32 s32, 0xe800
; GFX11-NEXT:    s_mov_b32 s33, s34
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx <512 x i32> @return_512xi32()
  ret void
}

attributes #0 = { nounwind }
